[GNN] 신용카드거래 사기탐지 – pyod 사용

Author

신록예찬,김보람

Published

January 25, 2024

ref: https://pyod.readthedocs.io/en/latest/pyod.models.html#all-models

1. Imports

import pandas as pd
import numpy as np
from pyod.models.abod import ABOD
#from pyod.models.alad import ALAD
#from pyod.models.anogan import AnoGAN
#from pyod.models.auto_encoder import AutoEncoder
from pyod.models.cblof import CBLOF
from pyod.models.cof import COF
from pyod.models.cd import CD
from pyod.models.copod import COPOD
#from pyod.models.deep_svdd import DeepSVDD
#from pyod.models.dif import DIF
from pyod.models.ecod import ECOD
#from pyod.models.feature_bagging import FeatureBagging
from pyod.models.gmm import GMM
from pyod.models.hbos import HBOS
from pyod.models.iforest import IForest
from pyod.models.inne import INNE
from pyod.models.kde import KDE
from pyod.models.knn import KNN
from pyod.models.kpca import KPCA
from pyod.models.kpca import PyODKernelPCA
from pyod.models.lmdd import LMDD
from pyod.models.loda import LODA
from pyod.models.lof import LOF
from pyod.models.loci import LOCI
#from pyod.models.lunar import LUNAR
from pyod.models.lscp import LSCP
from pyod.models.mad import MAD
from pyod.models.mcd import MCD
#from pyod.models.mo_gaal import MO_GAAL
from pyod.models.ocsvm import OCSVM
from pyod.models.pca import PCA
from pyod.models.qmcd import QMCD
from pyod.models.rgraph import RGraph
from pyod.models.rod import ROD
from pyod.models.sampling import Sampling
from pyod.models.sod import SOD
#from pyod.models.so_gaal import SO_GAAL
from pyod.models.sos import SOS
#from pyod.models.suod import SUOD
#from pyod.models.vae import VAE
#from pyod.models.xgbod import XGBOD
import warnings
warnings.filterwarnings('ignore')

2. Data

X = pd.DataFrame({'amt':[5000.0,1,2,3000,3,4,2000,5,6]})
y = pd.DataFrame({'is_fraud':[1,0,0,1,0,0,1,0,0]})
XX = pd.DataFrame({'amt':[3500.0,1.5]})

3. Predictor 만들기

- 1인것의 비율을 계산해서 fraud_ratio를 설정해야함. (즉 사기거래 비율)

fraud_ratio = 0.33
predictors = {
    'ABOD': ABOD(contamination=fraud_ratio),
#    'ALAD': ALAD(contamination=fraud_ratio),
#    'AnoGAN': AnoGAN(contamination=fraud_ratio),
#    'AutoEncoder':AutoEncoder(contamination=fraud_ratio),
##    'CBLOF': CBLOF(contamination=fraud_ratio,n_clusters=2),
##    'COF': COF(contamination=fraud_ratio),
##    'CD': CD(contamination=fraud_ratio),
    'COPOD': COPOD(contamination=fraud_ratio),
#    'DeepSVDD': DeepSVDD(contamination=fraud_ratio),
#    'DIF': DIF(contamination=fraud_ratio),    
    'ECOD': ECOD(contamination=fraud_ratio),
#    'FeatureBagging': FeatureBagging(contamination=fraud_ratio),
    'GMM': GMM(contamination=fraud_ratio),
    'HBOS': HBOS(contamination=fraud_ratio),
    'IForest': IForest(contamination=fraud_ratio),
    'INNE': INNE(contamination=fraud_ratio),
    'KDE': KDE(contamination=fraud_ratio),
    'KNN': KNN(contamination=fraud_ratio),
    'KPCA': KPCA(contamination=fraud_ratio),
#    'PyODKernelPCA': PyODKernelPCA(contamination=fraud_ratio),
##    'LMDD': LMDD(contamination=fraud_ratio),
    'LODA': LODA(contamination=fraud_ratio),
    'LOF': LOF(contamination=fraud_ratio),
    'LOCI': LOCI(contamination=fraud_ratio),
#    'LUNAR': LUNAR(contamination=fraud_ratio),
    'LODA': LODA(contamination=fraud_ratio),
#    'LSCP': LSCP(contamination=fraud_ratio),
    'MAD': MAD(contamination=fraud_ratio),
    'MCD': MCD(contamination=fraud_ratio),
#    'MO_GAAL': MO_GAAL(contamination=fraud_ratio),
    'OCSVM': OCSVM(contamination=fraud_ratio),
    'PCA': PCA(contamination=fraud_ratio),
###    'QMCD': QMCD(contamination=fraud_ratio),
    'RGraph': RGraph(contamination=fraud_ratio),
    'ROD': ROD(contamination=fraud_ratio),
##    'Sampling': Sampling(contamination=fraud_ratio),
##   'SOD': SOD(contamination=fraud_ratio),
#    'SO_GAAL': SO_GAAL(contamination=fraud_ratio),
    'SOS': SOS(contamination=fraud_ratio),
#    'SUOD': SUOD(contamination=fraud_ratio),
#    'VAE': VAE(contamination=fraud_ratio),
#    'XGBOD': XGBOD(contamination=fraud_ratio),  
}
  • 주석처리는 안만들어지는 것
  • #은 tensorflow 등이 없어서 ..
  • ##은 만들어지는데 .fit 할때 오류가 나느것
  • ###은 fit은 되는데 test할때 nan이 출력되는것

4. 학습 & 결과저장

yyhat_dict = dict()
for name,predictor in predictors.items():
    predictor.fit(X,y)
    yyhat_dict[name] = predictor.predict_proba(XX)[:,-1]
0/9
Test block 0/1
0/11
yyhat_dict # [1,0] 으로 예측해야 올바른것임 
{'ABOD': array([1., 0.]),
 'COPOD': array([0.69400716, 0.19400716]),
 'ECOD': array([0.69400716, 0.69400716]),
 'GMM': array([0.34286978, 0.03145675]),
 'HBOS': array([1., 0.]),
 'IForest': array([0.72671218, 0.16035735]),
 'INNE': array([0.22556971, 0.08458866]),
 'KDE': array([1.        , 0.16995667]),
 'KNN': array([6.99379131e-01, 1.00140196e-04]),
 'KPCA': array([1., 1.]),
 'LODA': array([0.58679469, 0.56      ]),
 'LOF': array([0., 0.]),
 'LOCI': array([0., 0.]),
 'MAD': array([0.6996997, 0.0007007]),
 'MCD': array([4.19839700e-01, 2.99759832e-04]),
 'OCSVM': array([1., 0.]),
 'PCA': array([0.18657712, 0.99981342]),
 'RGraph': array([0., 1.]),
 'ROD': array([1.66712365e-04, 6.44434498e-08]),
 'SOS': array([0., 0.])}

Appendix: 환경설정

conda create -n pyod 
conda activate pyod 
conda install -c conda-forge pyod 
!conda env list 
# conda environments:
#
base                     /home/cgb2/anaconda3
pyod                  *  /home/cgb2/anaconda3/envs/pyod
r                        /home/cgb2/anaconda3/envs/r
!conda list 
# packages in environment at /home/cgb2/anaconda3/envs/pyod:
#
# Name                    Version                   Build  Channel
_libgcc_mutex             0.1                        main  
_openmp_mutex             5.1                       1_gnu  
aiofiles                  22.1.0             pyhd8ed1ab_0    conda-forge
aiosqlite                 0.19.0             pyhd8ed1ab_0    conda-forge
anyio                     3.7.1              pyhd8ed1ab_0    conda-forge
argon2-cffi               23.1.0             pyhd8ed1ab_0    conda-forge
argon2-cffi-bindings      21.2.0          py311h5eee18b_0  
arrow                     1.3.0              pyhd8ed1ab_0    conda-forge
asttokens                 2.4.1              pyhd8ed1ab_0    conda-forge
attrs                     23.2.0             pyh71513ae_0    conda-forge
babel                     2.14.0             pyhd8ed1ab_0    conda-forge
beautifulsoup4            4.12.3             pyha770c72_0    conda-forge
blas                      1.0                         mkl  
bleach                    6.1.0              pyhd8ed1ab_0    conda-forge
bottleneck                1.3.5           py311hbed6279_0  
brotli                    1.0.9                h5eee18b_7  
brotli-bin                1.0.9                h5eee18b_7  
bzip2                     1.0.8                h7b6447c_0  
ca-certificates           2023.11.17           hbcca054_0    conda-forge
cached-property           1.5.2                hd8ed1ab_1    conda-forge
cached_property           1.5.2              pyha770c72_1    conda-forge
certifi                   2023.11.17         pyhd8ed1ab_0    conda-forge
cffi                      1.16.0          py311h5eee18b_0  
charset-normalizer        3.3.2              pyhd8ed1ab_0    conda-forge
comm                      0.2.1              pyhd8ed1ab_0    conda-forge
contourpy                 1.2.0           py311hdb19cb5_0  
cycler                    0.11.0             pyhd3eb1b0_0  
debugpy                   1.6.7           py311h6a678d5_0  
decorator                 5.1.1              pyhd8ed1ab_0    conda-forge
defusedxml                0.7.1              pyhd8ed1ab_0    conda-forge
entrypoints               0.4                pyhd8ed1ab_0    conda-forge
exceptiongroup            1.2.0              pyhd8ed1ab_2    conda-forge
executing                 2.0.1              pyhd8ed1ab_0    conda-forge
fonttools                 4.25.0             pyhd3eb1b0_0  
fqdn                      1.5.1              pyhd8ed1ab_0    conda-forge
freetype                  2.12.1               h4a9f257_0  
giflib                    5.2.1                h5eee18b_3  
idna                      3.6                pyhd8ed1ab_0    conda-forge
importlib-metadata        7.0.1              pyha770c72_0    conda-forge
importlib_resources       6.1.1              pyhd8ed1ab_0    conda-forge
intel-openmp              2023.1.0         hdb19cb5_46306  
ipykernel                 6.23.1             pyh210e3f2_0    conda-forge
ipython                   8.20.0             pyh707e725_0    conda-forge
ipython_genutils          0.2.0                      py_1    conda-forge
isoduration               20.11.0            pyhd8ed1ab_0    conda-forge
jedi                      0.19.1             pyhd8ed1ab_0    conda-forge
jinja2                    3.1.3              pyhd8ed1ab_0    conda-forge
joblib                    1.2.0           py311h06a4308_0  
jpeg                      9e                   h5eee18b_1  
json5                     0.9.14             pyhd8ed1ab_0    conda-forge
jsonpointer               2.4             py311h38be061_3    conda-forge
jsonschema                4.21.1             pyhd8ed1ab_0    conda-forge
jsonschema-specifications 2023.12.1          pyhd8ed1ab_0    conda-forge
jsonschema-with-format-nongpl 4.21.1             pyhd8ed1ab_0    conda-forge
jupyter_client            7.4.9              pyhd8ed1ab_0    conda-forge
jupyter_core              5.7.1           py311h38be061_0    conda-forge
jupyter_events            0.9.0              pyhd8ed1ab_0    conda-forge
jupyter_server            1.24.0             pyhd8ed1ab_0    conda-forge
jupyter_server_fileid     0.9.1              pyhd8ed1ab_0    conda-forge
jupyter_server_ydoc       0.8.0              pyhd8ed1ab_0    conda-forge
jupyter_ydoc              0.2.4           py311h06a4308_0  
jupyterlab                3.6.7              pyhd8ed1ab_0    conda-forge
jupyterlab_pygments       0.3.0              pyhd8ed1ab_0    conda-forge
jupyterlab_server         2.25.2             pyhd8ed1ab_0    conda-forge
kiwisolver                1.4.4           py311h6a678d5_0  
lcms2                     2.12                 h3be6417_0  
ld_impl_linux-64          2.38                 h1181459_1  
lerc                      3.0                  h295c915_0  
libbrotlicommon           1.0.9                h5eee18b_7  
libbrotlidec              1.0.9                h5eee18b_7  
libbrotlienc              1.0.9                h5eee18b_7  
libdeflate                1.17                 h5eee18b_1  
libffi                    3.4.4                h6a678d5_0  
libgcc-ng                 11.2.0               h1234567_1  
libgfortran-ng            11.2.0               h00389a5_1  
libgfortran5              11.2.0               h1234567_1  
libgomp                   11.2.0               h1234567_1  
libllvm14                 14.0.6               hdb19cb5_3  
libpng                    1.6.39               h5eee18b_0  
libsodium                 1.0.18               h36c2ea0_1    conda-forge
libstdcxx-ng              11.2.0               h1234567_1  
libtiff                   4.5.1                h6a678d5_0  
libuuid                   1.41.5               h5eee18b_0  
libwebp                   1.3.2                h11a3e52_0  
libwebp-base              1.3.2                h5eee18b_0  
llvmlite                  0.41.0          py311he621ea3_0  
lz4-c                     1.9.4                h6a678d5_0  
markupsafe                2.1.3           py311h5eee18b_0  
matplotlib-base           3.8.0           py311ha02d727_0  
matplotlib-inline         0.1.6              pyhd8ed1ab_0    conda-forge
mistune                   3.0.2              pyhd8ed1ab_0    conda-forge
mkl                       2023.1.0         h213fc3f_46344  
mkl-service               2.4.0           py311h5eee18b_1  
mkl_fft                   1.3.8           py311h5eee18b_0  
mkl_random                1.2.4           py311hdb19cb5_0  
munkres                   1.1.4                      py_0  
nbclassic                 1.0.0              pyh8b2e9e2_0    conda-forge
nbclient                  0.8.0              pyhd8ed1ab_0    conda-forge
nbconvert-core            7.14.2             pyhd8ed1ab_0    conda-forge
nbformat                  5.9.2              pyhd8ed1ab_0    conda-forge
ncurses                   6.4                  h6a678d5_0  
nest-asyncio              1.6.0              pyhd8ed1ab_0    conda-forge
notebook                  6.5.6              pyha770c72_0    conda-forge
notebook-shim             0.2.3              pyhd8ed1ab_0    conda-forge
numba                     0.58.1          py311ha02d727_0  
numexpr                   2.8.7           py311h65dcdc2_0  
numpy                     1.26.3          py311h08b1b3b_0  
numpy-base                1.26.3          py311hf175353_0  
openjpeg                  2.4.0                h3ad879b_0  
openssl                   3.0.12               h7f8727e_0  
packaging                 23.1            py311h06a4308_0  
pandas                    2.1.4           py311ha02d727_0  
pandocfilters             1.5.0              pyhd8ed1ab_0    conda-forge
parso                     0.8.3              pyhd8ed1ab_0    conda-forge
patsy                     0.5.3           py311h06a4308_0  
pexpect                   4.9.0              pyhd8ed1ab_0    conda-forge
pickleshare               0.7.5                   py_1003    conda-forge
pillow                    10.0.1          py311ha6cbd5a_0  
pip                       23.3.1          py311h06a4308_0  
pkgutil-resolve-name      1.3.10             pyhd8ed1ab_1    conda-forge
platformdirs              4.1.0              pyhd8ed1ab_0    conda-forge
prometheus_client         0.19.0             pyhd8ed1ab_0    conda-forge
prompt-toolkit            3.0.42             pyha770c72_0    conda-forge
psutil                    5.9.0           py311h5eee18b_0  
ptyprocess                0.7.0              pyhd3deb0d_0    conda-forge
pure_eval                 0.2.2              pyhd8ed1ab_0    conda-forge
pycparser                 2.21               pyhd8ed1ab_0    conda-forge
pygments                  2.17.2             pyhd8ed1ab_0    conda-forge
pyod                      1.1.2              pyhd8ed1ab_0    conda-forge
pyparsing                 3.0.9           py311h06a4308_0  
python                    3.11.7               h955ad1f_0  
python-dateutil           2.8.2              pyhd3eb1b0_0  
python-fastjsonschema     2.19.1             pyhd8ed1ab_0    conda-forge
python-json-logger        2.0.7              pyhd8ed1ab_0    conda-forge
python-tzdata             2023.3             pyhd3eb1b0_0  
python_abi                3.11                    2_cp311    conda-forge
pytz                      2023.3.post1    py311h06a4308_0  
pyyaml                    6.0.1           py311h5eee18b_0  
pyzmq                     23.2.0          py311h6a678d5_0  
readline                  8.2                  h5eee18b_0  
referencing               0.32.1             pyhd8ed1ab_0    conda-forge
requests                  2.31.0             pyhd8ed1ab_0    conda-forge
rfc3339-validator         0.1.4              pyhd8ed1ab_0    conda-forge
rfc3986-validator         0.1.1              pyh9f0ad1d_0    conda-forge
rpds-py                   0.10.6          py311hb02cf49_0  
scikit-learn              1.2.2           py311h6a678d5_1  
scipy                     1.11.4          py311h08b1b3b_0  
send2trash                1.8.2              pyh41d4057_0    conda-forge
setuptools                68.2.2          py311h06a4308_0  
six                       1.16.0             pyhd3eb1b0_1  
sniffio                   1.3.0              pyhd8ed1ab_0    conda-forge
soupsieve                 2.5                pyhd8ed1ab_1    conda-forge
sqlite                    3.41.2               h5eee18b_0  
stack_data                0.6.2              pyhd8ed1ab_0    conda-forge
statsmodels               0.14.0          py311hf4808d0_0  
tbb                       2021.8.0             hdb19cb5_0  
terminado                 0.18.0             pyh0d859eb_0    conda-forge
threadpoolctl             2.2.0              pyh0d69192_0  
tinycss2                  1.2.1              pyhd8ed1ab_0    conda-forge
tk                        8.6.12               h1ccaba5_0  
tomli                     2.0.1              pyhd8ed1ab_0    conda-forge
tornado                   6.3.3           py311h5eee18b_0  
traitlets                 5.14.1             pyhd8ed1ab_0    conda-forge
types-python-dateutil     2.8.19.20240106    pyhd8ed1ab_0    conda-forge
typing-extensions         4.9.0                hd8ed1ab_0    conda-forge
typing_extensions         4.9.0              pyha770c72_0    conda-forge
tzdata                    2023d                h04d1e81_0  
uri-template              1.3.0              pyhd8ed1ab_0    conda-forge
urllib3                   2.1.0           py311h06a4308_0  
wcwidth                   0.2.13             pyhd8ed1ab_0    conda-forge
webcolors                 1.13               pyhd8ed1ab_0    conda-forge
webencodings              0.5.1              pyhd8ed1ab_2    conda-forge
websocket-client          1.7.0              pyhd8ed1ab_0    conda-forge
wheel                     0.41.2          py311h06a4308_0  
xz                        5.4.5                h5eee18b_0  
y-py                      0.5.9           py311h52d8a92_0  
yaml                      0.2.5                h7f98852_2    conda-forge
ypy-websocket             0.8.2           py311h06a4308_0  
zeromq                    4.3.5                h6a678d5_0  
zipp                      3.17.0             pyhd8ed1ab_0    conda-forge
zlib                      1.2.13               h5eee18b_0  
zstd                      1.5.5                hc292b87_0